In [1]:
import getCommitMessages as gcm
import numpy as np
import textblob
from datetime import datetime
from pytz import timezone
import matplotlib as mpl
from matplotlib import pyplot as plt
%matplotlib inline
In [2]:
textfile = "./github_links.txt"
repo_links = np.loadtxt(textfile,dtype="string")
In [3]:
commit_infos = []
for link in repo_links:
messages, times = gcm.getCommitInfo(link)
user_repo = link[19:]
repo_data = (messages,times,user_repo)
commit_infos.append(repo_data)
In [4]:
all_commits = []
all_times = []
all_users = []
all_feelings = []
repos_feels = []
for repo_data in commit_infos:
commits = repo_data[0]
user_repo = repo_data[2]
all_commits += commits
all_times += repo_data[1]
all_users += [repo_data[2]]*len(commits)
feelings = []
for message in commits:
feel = textblob.TextBlob(message).sentiment.polarity
all_feelings.append(feel)
if not message.startswith("Merge"):
feelings.append(feel)
average_feels = sum(feelings)/len(feelings)
repos_feels.append(average_feels)
#print "%s has average feelings = %f"%(user_repo,average_feels)
In [5]:
len(all_users), len(all_commits), len(all_feelings)
Out[5]:
In [6]:
ind = np.argmax(np.array(all_feelings))
feels = all_feelings[ind]
commit = all_commits[ind]
user = all_users[ind]
print "user %s had the happiest commit (%f): \n %s"%(user,feels,commit)
In [7]:
ind = np.argmin(np.array(all_feelings))
feels = all_feelings[ind]
commit = all_commits[ind]
user = all_users[ind]
print "user %s had the saddest commit (%f): \n %s"%(user,feels,commit)
In [8]:
all_happiest = np.argmax(np.array(repos_feels))
for happy in all_happiest:
happy_repo_data = commit_infos[happy]
happy_user = happy_repo_data[2]
commits = happy_repo_data[0]
happinesses = []
for commit in commits:
happinesses.append(textblob.TextBlob(commit).sentiment.polarity)
which_happiest = np.argmax(np.array(happinesses))
happiest_commit = commits[which_happiest]
happiest_time = happy_repo_data[1][which_happiest]
print "%s is the happiest. Their happiest commit is: \n %s"%(happy_user,happiest_commit)
In [ ]:
all_saddest = np.argmin(np.array(repos_feels))
for sad in all_saddest:
sad_repo_data = commit_infos[sad]
sad_user = sad_repo_data[2]
commits = sad_repo_data[0]
sadnesses = []
for commit in commits:
sadnesses.append(textblob.TextBlob(commit).sentiment.polarity)
which_saddest = np.argmin(np.array((sadnesses))
saddest_commit = commits[which_saddest]
saddest_time = sad_repo_data[1][which_saddest]
print "%s is the saddest. Their saddest commit is: \n %s"%(sad_user,saddest_commit)
In [ ]:
all_datetimes = []
for time in all_times:
dt_obj = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
all_datetimes.append(dt_obj)
In [ ]:
fig, ax = plt.subplots(figsize=(13,8))
all_datenums = mpl.dates.date2num(all_datetimes)
ax.plot_date(all_datenums, all_feelings, ms=4)
ax.set_xlim([datetime(2016,8,29).toordinal(), datetime(2016,9,3).toordinal()])
vldate = datetime(2016,8,30).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,8,31).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,9,1).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
vldate = datetime(2016,9,2).toordinal()
ax.plot((vldate,vldate),(-1,1),'k--')
ax.set_xlabel("Time",fontsize=20)
ax.set_ylabel("Happiness",fontsize=20)
plt.gcf().autofmt_xdate()
In [ ]:
seconds_in_day = 3600*24
standard = datetime(2016,8,28)
#standard = timezone('US/Eastern').localize(datetime(2016,8,28))
#all_datesecs = [timezone('US/Eastern').localize(i) - standard for i in all_datetimes]
all_datesecs = [i - standard for i in all_datetimes]
all_seconds = [i.total_seconds()%seconds_in_day for i in all_datesecs]
fig2, ax2 = plt.subplots(figsize=(12,8))
ax2.scatter(all_seconds, all_feelings, s=18, color="#AA0000")
ax2.set_xlabel("Time of day (seconds)",fontsize=20)
ax2.set_ylabel("Happiness",fontsize=20)
ax2.set_xlim([0,seconds_in_day])
ax2.set_ylim([-1,1])
In [ ]: